In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt 
In [2]:
path=r'F:\Python_Data_Analyis_Project\additional_data-20230323T140537Z-001\additional_data'
In [3]:
Files=os.listdir(path)
In [4]:
Files_csv= [Files[i] for i in range (0, len(Files),2)]
In [5]:
Files_csv[0].split('.')[0][0:2]
Out[5]:
'CA'
In [6]:
Full_df=pd.DataFrame()
for file in Files_csv:
    current_df= pd.read_csv(path +'/'+ file, encoding= 'iso-8859-1', error_bad_lines=False)
    current_df['Country']= file.split('.')[0][0:2]
    Full_df= pd.concat([Full_df,current_df])
C:\Users\admin\AppData\Local\Temp\ipykernel_13876\2314862919.py:3: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.


  current_df= pd.read_csv(path +'/'+ file, encoding= 'iso-8859-1', error_bad_lines=False)
C:\Users\admin\AppData\Local\Temp\ipykernel_13876\2314862919.py:3: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.


  current_df= pd.read_csv(path +'/'+ file, encoding= 'iso-8859-1', error_bad_lines=False)
C:\Users\admin\AppData\Local\Temp\ipykernel_13876\2314862919.py:3: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.


  current_df= pd.read_csv(path +'/'+ file, encoding= 'iso-8859-1', error_bad_lines=False)
C:\Users\admin\AppData\Local\Temp\ipykernel_13876\2314862919.py:3: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.


  current_df= pd.read_csv(path +'/'+ file, encoding= 'iso-8859-1', error_bad_lines=False)
C:\Users\admin\AppData\Local\Temp\ipykernel_13876\2314862919.py:3: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.


  current_df= pd.read_csv(path +'/'+ file, encoding= 'iso-8859-1', error_bad_lines=False)
C:\Users\admin\AppData\Local\Temp\ipykernel_13876\2314862919.py:3: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.


  current_df= pd.read_csv(path +'/'+ file, encoding= 'iso-8859-1', error_bad_lines=False)
C:\Users\admin\AppData\Local\Temp\ipykernel_13876\2314862919.py:3: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.


  current_df= pd.read_csv(path +'/'+ file, encoding= 'iso-8859-1', error_bad_lines=False)
C:\Users\admin\AppData\Local\Temp\ipykernel_13876\2314862919.py:3: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.


  current_df= pd.read_csv(path +'/'+ file, encoding= 'iso-8859-1', error_bad_lines=False)
C:\Users\admin\AppData\Local\Temp\ipykernel_13876\2314862919.py:3: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.


  current_df= pd.read_csv(path +'/'+ file, encoding= 'iso-8859-1', error_bad_lines=False)
C:\Users\admin\AppData\Local\Temp\ipykernel_13876\2314862919.py:3: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.


  current_df= pd.read_csv(path +'/'+ file, encoding= 'iso-8859-1', error_bad_lines=False)
In [7]:
Full_df.head()
Out[7]:
video_id trending_date title channel_title category_id publish_time tags views likes dislikes comment_count thumbnail_link comments_disabled ratings_disabled video_error_or_removed description Country
0 n1WpP7iowLc 17.14.11 Eminem - Walk On Water (Audio) ft. Beyoncé EminemVEVO 10 2017-11-10T17:00:03.000Z Eminem|"Walk"|"On"|"Water"|"Aftermath/Shady/In... 17158579 787425 43420 125882 https://i.ytimg.com/vi/n1WpP7iowLc/default.jpg False False False Eminem's new track Walk on Water ft. Beyoncé ... CA
1 0dBIkQ4Mz1M 17.14.11 PLUSH - Bad Unboxing Fan Mail iDubbbzTV 23 2017-11-13T17:00:00.000Z plush|"bad unboxing"|"unboxing"|"fan mail"|"id... 1014651 127794 1688 13030 https://i.ytimg.com/vi/0dBIkQ4Mz1M/default.jpg False False False STill got a lot of packages. Probably will las... CA
2 5qpjK5DgCt4 17.14.11 Racist Superman | Rudy Mancuso, King Bach & Le... Rudy Mancuso 23 2017-11-12T19:05:24.000Z racist superman|"rudy"|"mancuso"|"king"|"bach"... 3191434 146035 5339 8181 https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg False False False WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► ... CA
3 d380meD0W0M 17.14.11 I Dare You: GOING BALD!? nigahiga 24 2017-11-12T18:01:41.000Z ryan|"higa"|"higatv"|"nigahiga"|"i dare you"|"... 2095828 132239 1989 17518 https://i.ytimg.com/vi/d380meD0W0M/default.jpg False False False I know it's been a while since we did this sho... CA
4 2Vv-BfVoq4g 17.14.11 Ed Sheeran - Perfect (Official Music Video) Ed Sheeran 10 2017-11-09T11:04:14.000Z edsheeran|"ed sheeran"|"acoustic"|"live"|"cove... 33523622 1634130 21082 85067 https://i.ytimg.com/vi/2Vv-BfVoq4g/default.jpg False False False 🎧: https://ad.gt/yt-perfect\n💰: https://... CA
In [8]:
Full_df.shape
Out[8]:
(375942, 17)

Which Cateogry has maximum Likes?¶

In [9]:
Full_df['category_id'].unique()
Out[9]:
array([10, 23, 24, 25, 22, 26,  1, 28, 20, 17, 29, 15, 19,  2, 27, 43, 30,
       44], dtype=int64)
In [10]:
Cat= pd.read_csv(r'F:\Python_Data_Analyis_Project\category_file.txt',sep=': \t\t')
C:\Users\admin\AppData\Local\Temp\ipykernel_13876\1417910590.py:1: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.
  Cat= pd.read_csv(r'F:\Python_Data_Analyis_Project\category_file.txt',sep=': \t\t')
In [11]:
Cat
Out[11]:
Category_id Category_name
1 Film & Animation
2 Autos & Vehicles
10 Music
15 Pets & Animals
17 Sports
18 Short Movies
19 Travel & Events
20 Gaming
21 Videoblogging
22 People & Blogs
23 Comedy
24 Entertainment
25 News & Politics
26 Howto & Style
27 Education
28 Science & Technology
29 Nonprofits & Activism
30 Movies
31 Anime/Animation
32 Action/Adventure
33 Classics
34 Comedy
35 Documentary
36 Drama
37 Family
38 Foreign
39 Horror
40 Sci-Fi/Fantasy
41 Thriller
42 Shorts
43 Shows
44 Trailers
In [12]:
Cat.reset_index(inplace=True)
In [13]:
Cat.columns=['category_id','category_name']
In [14]:
Cat
Out[14]:
category_id category_name
0 1 Film & Animation
1 2 Autos & Vehicles
2 10 Music
3 15 Pets & Animals
4 17 Sports
5 18 Short Movies
6 19 Travel & Events
7 20 Gaming
8 21 Videoblogging
9 22 People & Blogs
10 23 Comedy
11 24 Entertainment
12 25 News & Politics
13 26 Howto & Style
14 27 Education
15 28 Science & Technology
16 29 Nonprofits & Activism
17 30 Movies
18 31 Anime/Animation
19 32 Action/Adventure
20 33 Classics
21 34 Comedy
22 35 Documentary
23 36 Drama
24 37 Family
25 38 Foreign
26 39 Horror
27 40 Sci-Fi/Fantasy
28 41 Thriller
29 42 Shorts
30 43 Shows
31 44 Trailers
In [15]:
Cat.set_index('category_id', inplace=True)
In [16]:
Cat
Out[16]:
category_name
category_id
1 Film & Animation
2 Autos & Vehicles
10 Music
15 Pets & Animals
17 Sports
18 Short Movies
19 Travel & Events
20 Gaming
21 Videoblogging
22 People & Blogs
23 Comedy
24 Entertainment
25 News & Politics
26 Howto & Style
27 Education
28 Science & Technology
29 Nonprofits & Activism
30 Movies
31 Anime/Animation
32 Action/Adventure
33 Classics
34 Comedy
35 Documentary
36 Drama
37 Family
38 Foreign
39 Horror
40 Sci-Fi/Fantasy
41 Thriller
42 Shorts
43 Shows
44 Trailers
In [17]:
dct=Cat.to_dict()
In [18]:
dct['category_name']
Out[18]:
{1: 'Film & Animation',
 2: 'Autos & Vehicles',
 10: 'Music',
 15: 'Pets & Animals',
 17: 'Sports',
 18: 'Short Movies',
 19: 'Travel & Events',
 20: 'Gaming',
 21: 'Videoblogging',
 22: 'People & Blogs',
 23: 'Comedy',
 24: 'Entertainment',
 25: 'News & Politics',
 26: 'Howto & Style',
 27: 'Education',
 28: 'Science & Technology',
 29: 'Nonprofits & Activism',
 30: 'Movies',
 31: 'Anime/Animation',
 32: 'Action/Adventure',
 33: 'Classics',
 34: 'Comedy',
 35: 'Documentary',
 36: 'Drama',
 37: 'Family',
 38: 'Foreign',
 39: 'Horror',
 40: 'Sci-Fi/Fantasy',
 41: 'Thriller',
 42: 'Shorts',
 43: 'Shows',
 44: 'Trailers'}
In [19]:
Full_df['category_name']=Full_df['category_id'].map(dct['category_name'])
In [20]:
Full_df.columns
Out[20]:
Index(['video_id', 'trending_date', 'title', 'channel_title', 'category_id',
       'publish_time', 'tags', 'views', 'likes', 'dislikes', 'comment_count',
       'thumbnail_link', 'comments_disabled', 'ratings_disabled',
       'video_error_or_removed', 'description', 'Country', 'category_name'],
      dtype='object')
In [21]:
Full_df.head(4)
Out[21]:
video_id trending_date title channel_title category_id publish_time tags views likes dislikes comment_count thumbnail_link comments_disabled ratings_disabled video_error_or_removed description Country category_name
0 n1WpP7iowLc 17.14.11 Eminem - Walk On Water (Audio) ft. Beyoncé EminemVEVO 10 2017-11-10T17:00:03.000Z Eminem|"Walk"|"On"|"Water"|"Aftermath/Shady/In... 17158579 787425 43420 125882 https://i.ytimg.com/vi/n1WpP7iowLc/default.jpg False False False Eminem's new track Walk on Water ft. Beyoncé ... CA Music
1 0dBIkQ4Mz1M 17.14.11 PLUSH - Bad Unboxing Fan Mail iDubbbzTV 23 2017-11-13T17:00:00.000Z plush|"bad unboxing"|"unboxing"|"fan mail"|"id... 1014651 127794 1688 13030 https://i.ytimg.com/vi/0dBIkQ4Mz1M/default.jpg False False False STill got a lot of packages. Probably will las... CA Comedy
2 5qpjK5DgCt4 17.14.11 Racist Superman | Rudy Mancuso, King Bach & Le... Rudy Mancuso 23 2017-11-12T19:05:24.000Z racist superman|"rudy"|"mancuso"|"king"|"bach"... 3191434 146035 5339 8181 https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg False False False WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► ... CA Comedy
3 d380meD0W0M 17.14.11 I Dare You: GOING BALD!? nigahiga 24 2017-11-12T18:01:41.000Z ryan|"higa"|"higatv"|"nigahiga"|"i dare you"|"... 2095828 132239 1989 17518 https://i.ytimg.com/vi/d380meD0W0M/default.jpg False False False I know it's been a while since we did this sho... CA Entertainment
In [22]:
plt.figure(figsize=(15,10))
sns.boxplot(x="category_name", y="likes", data= Full_df)
plt.xticks(rotation="vertical")
Out[22]:
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17]),
 [Text(0, 0, 'Music'),
  Text(1, 0, 'Comedy'),
  Text(2, 0, 'Entertainment'),
  Text(3, 0, 'News & Politics'),
  Text(4, 0, 'People & Blogs'),
  Text(5, 0, 'Howto & Style'),
  Text(6, 0, 'Film & Animation'),
  Text(7, 0, 'Science & Technology'),
  Text(8, 0, 'Gaming'),
  Text(9, 0, 'Sports'),
  Text(10, 0, 'Nonprofits & Activism'),
  Text(11, 0, 'Pets & Animals'),
  Text(12, 0, 'Travel & Events'),
  Text(13, 0, 'Autos & Vehicles'),
  Text(14, 0, 'Education'),
  Text(15, 0, 'Shows'),
  Text(16, 0, 'Movies'),
  Text(17, 0, 'Trailers')])
In [23]:
sns.barplot(x="category_name", y="likes", data=Full_df)
plt.xticks(rotation="vertical")
Out[23]:
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17]),
 [Text(0, 0, 'Music'),
  Text(1, 0, 'Comedy'),
  Text(2, 0, 'Entertainment'),
  Text(3, 0, 'News & Politics'),
  Text(4, 0, 'People & Blogs'),
  Text(5, 0, 'Howto & Style'),
  Text(6, 0, 'Film & Animation'),
  Text(7, 0, 'Science & Technology'),
  Text(8, 0, 'Gaming'),
  Text(9, 0, 'Sports'),
  Text(10, 0, 'Nonprofits & Activism'),
  Text(11, 0, 'Pets & Animals'),
  Text(12, 0, 'Travel & Events'),
  Text(13, 0, 'Autos & Vehicles'),
  Text(14, 0, 'Education'),
  Text(15, 0, 'Shows'),
  Text(16, 0, 'Movies'),
  Text(17, 0, 'Trailers')])
In [24]:
new_df= Full_df[["category_name","likes"]]
In [25]:
new_df=new_df.groupby(['category_name']).sum()
In [26]:
new_df
Out[26]:
likes
category_name
Autos & Vehicles 45461895
Comedy 1081392644
Education 117479047
Entertainment 2857743591
Film & Animation 589885590
Gaming 298337663
Howto & Style 347338295
Movies 1005417
Music 7227198427
News & Politics 163503422
Nonprofits & Activism 93538593
People & Blogs 692550961
Pets & Animals 56601492
Science & Technology 252570921
Shows 4570827
Sports 399630743
Trailers 198
Travel & Events 13494079
In [27]:
sns.heatmap(new_df)
Out[27]:
<AxesSubplot:ylabel='category_name'>

Let's find out whether audiences are engaging or not.¶

In [28]:
Full_df.head(4)
Out[28]:
video_id trending_date title channel_title category_id publish_time tags views likes dislikes comment_count thumbnail_link comments_disabled ratings_disabled video_error_or_removed description Country category_name
0 n1WpP7iowLc 17.14.11 Eminem - Walk On Water (Audio) ft. Beyoncé EminemVEVO 10 2017-11-10T17:00:03.000Z Eminem|"Walk"|"On"|"Water"|"Aftermath/Shady/In... 17158579 787425 43420 125882 https://i.ytimg.com/vi/n1WpP7iowLc/default.jpg False False False Eminem's new track Walk on Water ft. Beyoncé ... CA Music
1 0dBIkQ4Mz1M 17.14.11 PLUSH - Bad Unboxing Fan Mail iDubbbzTV 23 2017-11-13T17:00:00.000Z plush|"bad unboxing"|"unboxing"|"fan mail"|"id... 1014651 127794 1688 13030 https://i.ytimg.com/vi/0dBIkQ4Mz1M/default.jpg False False False STill got a lot of packages. Probably will las... CA Comedy
2 5qpjK5DgCt4 17.14.11 Racist Superman | Rudy Mancuso, King Bach & Le... Rudy Mancuso 23 2017-11-12T19:05:24.000Z racist superman|"rudy"|"mancuso"|"king"|"bach"... 3191434 146035 5339 8181 https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg False False False WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► ... CA Comedy
3 d380meD0W0M 17.14.11 I Dare You: GOING BALD!? nigahiga 24 2017-11-12T18:01:41.000Z ryan|"higa"|"higatv"|"nigahiga"|"i dare you"|"... 2095828 132239 1989 17518 https://i.ytimg.com/vi/d380meD0W0M/default.jpg False False False I know it's been a while since we did this sho... CA Entertainment
In [29]:
Full_df["likes_rate"]=Full_df["likes"]/Full_df["views"]
In [30]:
Full_df["dislikes_rate"]=Full_df["likes"]/Full_df["views"]
In [31]:
Full_df["comments_rate"]=Full_df["comment_count"]/Full_df["views"]
In [32]:
Engage_df= Full_df[["category_name","likes_rate","dislikes_rate","comments_rate"]]
In [33]:
Engage_df
Out[33]:
category_name likes_rate dislikes_rate comments_rate
0 Music 0.045891 0.045891 0.007336
1 Comedy 0.125949 0.125949 0.012842
2 Comedy 0.045758 0.045758 0.002563
3 Entertainment 0.063096 0.063096 0.008359
4 Music 0.048746 0.048746 0.002538
... ... ... ... ...
40944 Pets & Animals 0.022639 0.022639 0.001576
40945 People & Blogs 0.056356 0.056356 0.003696
40946 Entertainment 0.045073 0.045073 0.003743
40947 Film & Animation 0.034086 0.034086 0.002312
40948 Gaming 0.034647 0.034647 0.014049

375942 rows × 4 columns

In [34]:
df2 = Engage_df.groupby('category_name')['likes_rate'].sum()
In [35]:
df2
Out[35]:
category_name
Autos & Vehicles          200.654464
Comedy                   1369.307565
Education                 389.849038
Entertainment            3281.569795
Film & Animation          587.231379
Gaming                    542.190552
Howto & Style            1006.657259
Movies                      0.622599
Music                    2029.814627
News & Politics           822.715914
Nonprofits & Activism     112.013650
People & Blogs           1872.317994
Pets & Animals            209.070391
Science & Technology      392.502954
Shows                      11.169937
Sports                    494.549438
Trailers                    0.031669
Travel & Events            67.590184
Name: likes_rate, dtype: float64
In [36]:
Full_df.drop(["likes_rate","dislikes_rate","comments_rate"], axis=1)
Out[36]:
video_id trending_date title channel_title category_id publish_time tags views likes dislikes comment_count thumbnail_link comments_disabled ratings_disabled video_error_or_removed description Country category_name
0 n1WpP7iowLc 17.14.11 Eminem - Walk On Water (Audio) ft. Beyoncé EminemVEVO 10 2017-11-10T17:00:03.000Z Eminem|"Walk"|"On"|"Water"|"Aftermath/Shady/In... 17158579 787425 43420 125882 https://i.ytimg.com/vi/n1WpP7iowLc/default.jpg False False False Eminem's new track Walk on Water ft. Beyoncé ... CA Music
1 0dBIkQ4Mz1M 17.14.11 PLUSH - Bad Unboxing Fan Mail iDubbbzTV 23 2017-11-13T17:00:00.000Z plush|"bad unboxing"|"unboxing"|"fan mail"|"id... 1014651 127794 1688 13030 https://i.ytimg.com/vi/0dBIkQ4Mz1M/default.jpg False False False STill got a lot of packages. Probably will las... CA Comedy
2 5qpjK5DgCt4 17.14.11 Racist Superman | Rudy Mancuso, King Bach & Le... Rudy Mancuso 23 2017-11-12T19:05:24.000Z racist superman|"rudy"|"mancuso"|"king"|"bach"... 3191434 146035 5339 8181 https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg False False False WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► ... CA Comedy
3 d380meD0W0M 17.14.11 I Dare You: GOING BALD!? nigahiga 24 2017-11-12T18:01:41.000Z ryan|"higa"|"higatv"|"nigahiga"|"i dare you"|"... 2095828 132239 1989 17518 https://i.ytimg.com/vi/d380meD0W0M/default.jpg False False False I know it's been a while since we did this sho... CA Entertainment
4 2Vv-BfVoq4g 17.14.11 Ed Sheeran - Perfect (Official Music Video) Ed Sheeran 10 2017-11-09T11:04:14.000Z edsheeran|"ed sheeran"|"acoustic"|"live"|"cove... 33523622 1634130 21082 85067 https://i.ytimg.com/vi/2Vv-BfVoq4g/default.jpg False False False 🎧: https://ad.gt/yt-perfect\n💰: https://... CA Music
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
40944 BZt0qjTWNhw 18.14.06 The Cat Who Caught the Laser AaronsAnimals 15 2018-05-18T13:00:04.000Z aarons animals|"aarons"|"animals"|"cat"|"cats"... 1685609 38160 1385 2657 https://i.ytimg.com/vi/BZt0qjTWNhw/default.jpg False False False The Cat Who Caught the Laser - Aaron's Animals US Pets & Animals
40945 1h7KV2sjUWY 18.14.06 True Facts : Ant Mutualism zefrank1 22 2018-05-18T01:00:06.000Z [none] 1064798 60008 382 3936 https://i.ytimg.com/vi/1h7KV2sjUWY/default.jpg False False False NaN US People & Blogs
40946 D6Oy4LfoqsU 18.14.06 I GAVE SAFIYA NYGAARD A PERFECT HAIR MAKEOVER ... Brad Mondo 24 2018-05-18T17:34:22.000Z I gave safiya nygaard a perfect hair makeover ... 1066451 48068 1032 3992 https://i.ytimg.com/vi/D6Oy4LfoqsU/default.jpg False False False I had so much fun transforming Safiyas hair in... US Entertainment
40947 oV0zkMe1K8s 18.14.06 How Black Panther Should Have Ended How It Should Have Ended 1 2018-05-17T17:00:04.000Z Black Panther|"HISHE"|"Marvel"|"Infinity War"|... 5660813 192957 2846 13088 https://i.ytimg.com/vi/oV0zkMe1K8s/default.jpg False False False How Black Panther Should Have EndedWatch More ... US Film & Animation
40948 ooyjaVdt-jA 18.14.06 Official Call of Duty®: Black Ops 4 — Mult... Call of Duty 20 2018-05-17T17:09:38.000Z call of duty|"cod"|"activision"|"Black Ops 4" 10306119 357079 212976 144795 https://i.ytimg.com/vi/ooyjaVdt-jA/default.jpg False False False Call of Duty: Black Ops 4 Multiplayer raises t... US Gaming

375942 rows × 18 columns

In [37]:
Engage_df= Full_df[["category_name","likes","dislikes","comment_count","views"]]
In [38]:
Engage_df
Out[38]:
category_name likes dislikes comment_count views
0 Music 787425 43420 125882 17158579
1 Comedy 127794 1688 13030 1014651
2 Comedy 146035 5339 8181 3191434
3 Entertainment 132239 1989 17518 2095828
4 Music 1634130 21082 85067 33523622
... ... ... ... ... ...
40944 Pets & Animals 38160 1385 2657 1685609
40945 People & Blogs 60008 382 3936 1064798
40946 Entertainment 48068 1032 3992 1066451
40947 Film & Animation 192957 2846 13088 5660813
40948 Gaming 357079 212976 144795 10306119

375942 rows × 5 columns

In [39]:
Engage_df= Engage_df.groupby('category_name', as_index=True).sum()
In [ ]:
 
In [40]:
Engage_df["likes_rate"]=(Engage_df["likes"]/Engage_df["views"])*100
In [41]:
Engage_df
Out[41]:
likes dislikes comment_count views likes_rate
category_name
Autos & Vehicles 45461895 2571460 5957385 1661853766 2.735613
Comedy 1081392644 40698333 110064836 22050866339 4.904082
Education 117479047 3710565 13547175 2734841410 4.295644
Entertainment 2857743591 248270342 407583131 104517467253 2.734226
Film & Animation 589885590 25279207 65387125 27619347901 2.135769
Gaming 298337663 19534374 48853951 7730729502 3.859114
Howto & Style 347338295 12756984 49180936 9771031927 3.554776
Movies 1005417 50242 55832 70359777 1.428966
Music 7227198427 294657819 620030515 255967088943 2.823487
News & Politics 163503422 28778398 45391130 10422502991 1.568754
Nonprofits & Activism 93538593 24670453 26902565 1219859213 7.667983
People & Blogs 692550961 56634003 100889458 23600365409 2.934493
Pets & Animals 56601492 1503766 8103678 2008474231 2.818134
Science & Technology 252570921 12218574 47344743 9194715151 2.746914
Shows 4570827 682505 641365 444064556 1.029316
Sports 399630743 26536025 46998109 18972425164 2.106377
Trailers 198 9 2 55043 0.359719
Travel & Events 13494079 739962 2240753 726674959 1.856962
In [42]:
Engage_df["dislikes_rate"]=(Engage_df["dislikes"]/Engage_df["views"])*100
In [43]:
Engage_df
Out[43]:
likes dislikes comment_count views likes_rate dislikes_rate
category_name
Autos & Vehicles 45461895 2571460 5957385 1661853766 2.735613 0.154734
Comedy 1081392644 40698333 110064836 22050866339 4.904082 0.184566
Education 117479047 3710565 13547175 2734841410 4.295644 0.135678
Entertainment 2857743591 248270342 407583131 104517467253 2.734226 0.237540
Film & Animation 589885590 25279207 65387125 27619347901 2.135769 0.091527
Gaming 298337663 19534374 48853951 7730729502 3.859114 0.252685
Howto & Style 347338295 12756984 49180936 9771031927 3.554776 0.130559
Movies 1005417 50242 55832 70359777 1.428966 0.071407
Music 7227198427 294657819 620030515 255967088943 2.823487 0.115116
News & Politics 163503422 28778398 45391130 10422502991 1.568754 0.276118
Nonprofits & Activism 93538593 24670453 26902565 1219859213 7.667983 2.022402
People & Blogs 692550961 56634003 100889458 23600365409 2.934493 0.239971
Pets & Animals 56601492 1503766 8103678 2008474231 2.818134 0.074871
Science & Technology 252570921 12218574 47344743 9194715151 2.746914 0.132887
Shows 4570827 682505 641365 444064556 1.029316 0.153695
Sports 399630743 26536025 46998109 18972425164 2.106377 0.139866
Trailers 198 9 2 55043 0.359719 0.016351
Travel & Events 13494079 739962 2240753 726674959 1.856962 0.101828
In [44]:
Engage_df["comment_rate"]=(Engage_df["comment_count"]/Engage_df["views"])*100
In [45]:
Engage_df
Out[45]:
likes dislikes comment_count views likes_rate dislikes_rate comment_rate
category_name
Autos & Vehicles 45461895 2571460 5957385 1661853766 2.735613 0.154734 0.358478
Comedy 1081392644 40698333 110064836 22050866339 4.904082 0.184566 0.499141
Education 117479047 3710565 13547175 2734841410 4.295644 0.135678 0.495355
Entertainment 2857743591 248270342 407583131 104517467253 2.734226 0.237540 0.389967
Film & Animation 589885590 25279207 65387125 27619347901 2.135769 0.091527 0.236744
Gaming 298337663 19534374 48853951 7730729502 3.859114 0.252685 0.631945
Howto & Style 347338295 12756984 49180936 9771031927 3.554776 0.130559 0.503334
Movies 1005417 50242 55832 70359777 1.428966 0.071407 0.079352
Music 7227198427 294657819 620030515 255967088943 2.823487 0.115116 0.242231
News & Politics 163503422 28778398 45391130 10422502991 1.568754 0.276118 0.435511
Nonprofits & Activism 93538593 24670453 26902565 1219859213 7.667983 2.022402 2.205383
People & Blogs 692550961 56634003 100889458 23600365409 2.934493 0.239971 0.427491
Pets & Animals 56601492 1503766 8103678 2008474231 2.818134 0.074871 0.403474
Science & Technology 252570921 12218574 47344743 9194715151 2.746914 0.132887 0.514913
Shows 4570827 682505 641365 444064556 1.029316 0.153695 0.144431
Sports 399630743 26536025 46998109 18972425164 2.106377 0.139866 0.247718
Trailers 198 9 2 55043 0.359719 0.016351 0.003634
Travel & Events 13494079 739962 2240753 726674959 1.856962 0.101828 0.308357
In [49]:
import matplotlib.ticker as mtick
sns.barplot(x=Engage_df.index, y="likes_rate", data= Engage_df)
plt.xticks(rotation="vertical")
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(xmax=100.0))
In [50]:
sns.regplot(x="likes",y="views", data=Engage_df)
Out[50]:
<AxesSubplot:xlabel='likes', ylabel='views'>
In [51]:
sns.heatmap(Full_df[["views","likes","dislikes"]].corr(),annot=True)
Out[51]:
<AxesSubplot:>

Which Channel has largest trending Videos?¶

In [52]:
Full_df.head()
Out[52]:
video_id trending_date title channel_title category_id publish_time tags views likes dislikes ... thumbnail_link comments_disabled ratings_disabled video_error_or_removed description Country category_name likes_rate dislikes_rate comments_rate
0 n1WpP7iowLc 17.14.11 Eminem - Walk On Water (Audio) ft. Beyoncé EminemVEVO 10 2017-11-10T17:00:03.000Z Eminem|"Walk"|"On"|"Water"|"Aftermath/Shady/In... 17158579 787425 43420 ... https://i.ytimg.com/vi/n1WpP7iowLc/default.jpg False False False Eminem's new track Walk on Water ft. Beyoncé ... CA Music 0.045891 0.045891 0.007336
1 0dBIkQ4Mz1M 17.14.11 PLUSH - Bad Unboxing Fan Mail iDubbbzTV 23 2017-11-13T17:00:00.000Z plush|"bad unboxing"|"unboxing"|"fan mail"|"id... 1014651 127794 1688 ... https://i.ytimg.com/vi/0dBIkQ4Mz1M/default.jpg False False False STill got a lot of packages. Probably will las... CA Comedy 0.125949 0.125949 0.012842
2 5qpjK5DgCt4 17.14.11 Racist Superman | Rudy Mancuso, King Bach & Le... Rudy Mancuso 23 2017-11-12T19:05:24.000Z racist superman|"rudy"|"mancuso"|"king"|"bach"... 3191434 146035 5339 ... https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg False False False WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► ... CA Comedy 0.045758 0.045758 0.002563
3 d380meD0W0M 17.14.11 I Dare You: GOING BALD!? nigahiga 24 2017-11-12T18:01:41.000Z ryan|"higa"|"higatv"|"nigahiga"|"i dare you"|"... 2095828 132239 1989 ... https://i.ytimg.com/vi/d380meD0W0M/default.jpg False False False I know it's been a while since we did this sho... CA Entertainment 0.063096 0.063096 0.008359
4 2Vv-BfVoq4g 17.14.11 Ed Sheeran - Perfect (Official Music Video) Ed Sheeran 10 2017-11-09T11:04:14.000Z edsheeran|"ed sheeran"|"acoustic"|"live"|"cove... 33523622 1634130 21082 ... https://i.ytimg.com/vi/2Vv-BfVoq4g/default.jpg False False False 🎧: https://ad.gt/yt-perfect\n💰: https://... CA Music 0.048746 0.048746 0.002538

5 rows × 21 columns

In [53]:
cdf=Full_df.groupby("channel_title")["video_id"].count().sort_values(ascending=False).to_frame().reset_index().rename(columns={"video_id":"total_videos"})
In [54]:
cdf
Out[54]:
channel_title total_videos
0 The Late Show with Stephen Colbert 984
1 WWE 804
2 Late Night with Seth Meyers 773
3 VikatanTV 763
4 TheEllenShow 743
... ... ...
37819 LIGHTS - 001 jrny 1
37820 bangtanist 1
37821 LIGAMX Femenil 1
37822 LIGA COLOMBIANA OFICIAL 1
37823 Pavel Sidorik TV 1

37824 rows × 2 columns

In [55]:
import plotly.express as ps
In [56]:
ps.bar(cdf[0:20], x="channel_title", y="total_videos")

Does punctuation in title and tags have the relation with views, likes, dislikes, comments?¶

In [57]:
import string
In [58]:
string.punctuation
Out[58]:
'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
In [65]:
def punc_count(x):
    return len([i for i in x if i in string.punctuation]) 
In [66]:
Full_df['title'][0]
Out[66]:
0          Eminem - Walk On Water (Audio) ft. Beyoncé
0    Sing zu Ende! | Gesangseinlagen vom Feinsten |...
0             Malika LePen : Femme de Gauche - Trailer
0        John Lewis Christmas Ad 2017 - #MozTheMonster
0    Sharry Mann: Cute Munda ( Song Teaser) | Parmi...
0    陸自ヘリ、垂直に落下=路上の車ã...
0             좋아 by 민서_윤ì¢
신_좋니 답가
0                       Capítulo 12 | MasterChef 2017
0    ЗаÑ
ар и Полина учатся эко...
0                   WE WANT TO TALK ABOUT OUR MARRIAGE
Name: title, dtype: object
In [67]:
punc_count('Eminem - Walk On Water (Audio) ft. Beyoncé')
Out[67]:
4
In [68]:
sample=Full_df[0:10000]
In [74]:
sample['punc_count']=sample['title'].apply(punc_count)
C:\Users\admin\AppData\Local\Temp\ipykernel_13876\3114645763.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [75]:
sample.head(2)
Out[75]:
video_id trending_date title channel_title category_id publish_time tags views likes dislikes ... comments_disabled ratings_disabled video_error_or_removed description Country category_name likes_rate dislikes_rate comments_rate punc_count
0 n1WpP7iowLc 17.14.11 Eminem - Walk On Water (Audio) ft. Beyoncé EminemVEVO 10 2017-11-10T17:00:03.000Z Eminem|"Walk"|"On"|"Water"|"Aftermath/Shady/In... 17158579 787425 43420 ... False False False Eminem's new track Walk on Water ft. Beyoncé ... CA Music 0.045891 0.045891 0.007336 4
1 0dBIkQ4Mz1M 17.14.11 PLUSH - Bad Unboxing Fan Mail iDubbbzTV 23 2017-11-13T17:00:00.000Z plush|"bad unboxing"|"unboxing"|"fan mail"|"id... 1014651 127794 1688 ... False False False STill got a lot of packages. Probably will las... CA Comedy 0.125949 0.125949 0.012842 1

2 rows × 22 columns

Box plot between the punctuation count and the views shows some sort of statistics.

In [76]:
sns.boxplot(x='punc_count', y='views', data=sample)
Out[76]:
<AxesSubplot:xlabel='punc_count', ylabel='views'>
In [78]:
sample['punc_count'].corr(sample['views'])
Out[78]:
0.0651000978304486

The correlation between the punction count and views is lamost 7%, this doesnot given any big conclusion with respect to their association.

In [ ]: